% This code constructs the data to be used for analysis. 
% Requires the data to span the entire time group, e.g. starting from the 
% first month of the first year to last month of the last year.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
clear all
close all
clc

%% 0. Import data
Data_raw_file = '../Data/Data_Raw.xlsx';
Data_raw_m = readtable(Data_raw_file,'ReadVariableNames',1,'sheet','Monthly','basic',true);
Data_raw_q = readtable(Data_raw_file,'ReadVariableNames',1,'sheet','Quarterly','basic',true);
Data_raw_a = readtable(Data_raw_file,'ReadVariableNames',1,'sheet','Annual','basic',true);

addpath(genpath('../Utility'))

%% 1. INFLATION and POPULATION

%% 1.1 Inflation
% Quarterly average of CPI
cpi_m = Data_raw_m.cpi;%1871m1 to 2019m12
cpi_q = gen_avg(cpi_m , 3); %1871Q1 to 2019Q4
cpi_a = gen_avg(cpi_q , 4);%1871 to 2019, annual average

cpi_bench = mean(cpi_m(1693 : 1704));%2012

infl_q = log(cpi_q(2 : end)) - log(cpi_q(1 : end - 1));

infl_ma= [nan(4 , 1);
          movmean(infl_q , 4 , 'Endpoints' , 'discard')]; 
      
% Since inflation is essential, we organize data length based on available
% inflation data
l_m = length(cpi_m);

l_q = length(cpi_q);%For level
l_q_g = l_q - 1;%For growth rate

l_a = length(cpi_a);%For level

%% 1.2 Population
% Quarterly population combining NIPA and Cambridge in thousands
pop_interp = exp(fillmissing(log(Data_raw_q.pop_Cam),'linear'));
pop_q = splice_data([Data_raw_q.pop_NIPA(end - l_q + 1 : end) , pop_interp(end - l_q + 1 : end)]); %1871Q1 to 2019Q4 
pop_a = gen_avg(pop_q , 4);%1871 to 2019, annual average

%% 2. LOG REAL PER CAPITA CONSUMPTION 

%% 2.1 Use Annual Consumption
% Nominal consumption from NIPA Table 2.3.5
C_a_NIPA = 10^6 * (Data_raw_a.nd(end - l_a + 1 : end) + Data_raw_a.ser(end - l_a + 1 : end));%1929 to 2019
Cr_a_NIPA = C_a_NIPA./cpi_a * cpi_bench; %1929 to 2019
Crpc_a_NIPA = Cr_a_NIPA./pop_a;
cg_a_NIPA = log(Crpc_a_NIPA(2 : end)) - log(Crpc_a_NIPA(1 : end - 1));

% Real consumption/GDP per capita from Barro and Ursua (2010)
Crpc_a_BU = Data_raw_a.Crpc_BU(end - l_a + 1 : end);
C_a_BU = Crpc_a_BU .* pop_a .* cpi_a;
cg_a_BU = log(Crpc_a_BU(2 : end)) - log(Crpc_a_BU(1 : end - 1));

Grpc_a_BU = Data_raw_a.Grpc_BU(end - l_a + 1 : end);
G_a_BU = Grpc_a_BU .* pop_a .* cpi_a;
gg_a_BU = log(Grpc_a_BU(2 : end)) - log(Grpc_a_BU(1 : end - 1));

% Splice annual consumption growth
cg_a = splice_data([cg_a_NIPA , cg_a_BU]);

c_a = [flip(log(Crpc_a_NIPA(end)) - cumsum(flip(cg_a)));log(Crpc_a_NIPA(end))]; %1871 to 2019

%% 2.2 Use Quarterly Consumption
% Quarterly nominal consumption from NIPA Table 2.3.5
% Note that NIPA Table 2.3.5 S.A. series report annualized level
C_q_NIPA = 10^6 * (Data_raw_q.nd(end - l_q + 1 : end) + Data_raw_q.ser(end - l_q + 1 : end))/4;
Cr_q_NIPA = C_q_NIPA./cpi_q * cpi_bench;
Crpc_q_NIPA = Cr_q_NIPA./pop_q;
cg_q_NIPA = [nan(3 , 1);...%Account for one less observation for other growth rate
            (log(Crpc_q_NIPA(5 : end)) - log(Crpc_q_NIPA(1 : end - 4)))/4];

%% 2.3 Interpolating Annual Consumption Growth

%--------------------------------------------------------------------------
% Method 1: simple method, fourth of annual growth (used in the main paper)
%--------------------------------------------------------------------------
cg_q_NIPA_simple = kron(cg_a_NIPA/4 , ones(4 , 1));
cg_q_BU_simple = kron(cg_a_BU/4 , ones(4 , 1));
gg_q_BU_simple = kron(gg_a_BU/4 , ones(4 , 1));

%--------------------------------------------------------------------------
% Method 2: proportional Denton method (not used in the main paper)
%--------------------------------------------------------------------------
% [C_q_NIPA_denton , C_a_NIPA_trend]= interp_denton(C_a_NIPA , 4 , 1 , 2 , 1 , 2);
% Cr_q_NIPA_denton = C_q_NIPA_denton./cpi_q * cpi_bench;
% Crpc_q_NIPA_denton = Cr_q_NIPA_denton./pop_q;
% 
% [C_q_BU_denton , C_a_BU_trend] = interp_denton(C_a_BU , 4 , 1 , 2 , 1 , 2);
% Cr_q_BU_denton = C_q_BU_denton./cpi_q * cpi_bench;
% Crpc_q_BU_denton = Cr_q_BU_denton./pop_q;
% 
% [G_q_BU_denton , G_a_BU_trend] = interp_denton(G_a_BU , 4 , 1 , 2 , 1 , 2);
% Gr_q_BU_denton = G_q_BU_denton./cpi_q * cpi_bench;
% Grpc_q_BU_denton = Gr_q_BU_denton./pop_q;
% 
% cg_q_NIPA_denton = (log(Crpc_q_NIPA_denton(5 : end)) - log(Crpc_q_NIPA_denton(1 : end - 4)))/4;
% cg_q_BU_denton = (log(Crpc_q_BU_denton(5 : end)) - log(Crpc_q_BU_denton(1 : end - 4)))/4;
% gg_q_BU_denton = (log(Grpc_q_BU_denton(5 : end)) - log(Grpc_q_BU_denton(1 : end - 4)))/4;
% 
% % Treat outliers in proportional Denton method
% NIPA_denton_idx = find(isoutlier(cg_q_NIPA_denton , 'mean') == 1);
% BU_C_denton_idx = find(isoutlier(cg_q_BU_denton , 'mean') == 1);
% BU_G_denton_idx = find(isoutlier(gg_q_BU_denton , 'mean') == 1);
% 
% cg_q_NIPA_denton(NIPA_denton_idx) = cg_q_NIPA_simple(NIPA_denton_idx);
% cg_q_BU_denton(BU_C_denton_idx) = cg_q_BU_simple(BU_C_denton_idx);                
% gg_q_BU_denton(BU_G_denton_idx) = gg_q_BU_simple(BU_G_denton_idx);                


% Choose interpolation method to use
% use denton or simple
method_c_name = 'simple';

cg_q_NIPA_int = [nan(l_q_g - length(eval(['cg_q_NIPA_' , method_c_name])) , 1);...
                 eval(['cg_q_NIPA_' , method_c_name])];
                
cg_q_BU_int = [nan(l_q_g - length(eval(['cg_q_BU_' , method_c_name])) , 1);...
               eval(['cg_q_BU_' , method_c_name])];

gg_q_BU_int = [nan(l_q_g - length(eval(['gg_q_BU_' , method_c_name])) , 1);...
               eval(['gg_q_BU_' , method_c_name])]; 
           
%% 2.4 Splice Quarterly Consumption Growth
cg_q = splice_data([cg_q_NIPA , cg_q_NIPA_int , cg_q_BU_int]); 
         
% Quarterly real per capita consumption implied by quarterly growth rates
c_q = [flip(log(Crpc_q_NIPA(end)) - cumsum(flip(cg_q)));log(Crpc_q_NIPA(end))]; 

%% 3. LOG REAL PER CAPITA DIVIDEND
% Important: track units/whether annualized or not

D_m_CRSP = Data_raw_m.D(end - l_m + 1 : end);

%% 3.1 Use Annual Dividend
% CRSP
D_a_CRSP = gen_avg(D_m_CRSP , 12) * 12;
Dr_a_CRSP = D_a_CRSP./cpi_a * cpi_bench;
Drpc_a_CRSP = Dr_a_CRSP./pop_a;
dg_a_CRSP = log(Drpc_a_CRSP(2 : end)) - log(Drpc_a_CRSP(1 : end - 1));

% Wright (2004)
D_a_W = Data_raw_a.D_W(end - l_a + 1 : end);
Dr_a_W = D_a_W./cpi_a * cpi_bench;
Drpc_a_W = Dr_a_W./pop_a;
dg_a_W = log(Drpc_a_W(2 : end)) - log(Drpc_a_W(1 : end - 1));

% Piketty et al. (2018), Appendix I, TSA 6
D_a_PSZ = Data_raw_a.D_PSZ(end - l_a + 1 : end);
Dr_a_PSZ = D_a_PSZ./cpi_a * cpi_bench;
Drpc_a_PSZ = Dr_a_PSZ./pop_a;
dg_a_PSZ = log(Drpc_a_PSZ(2 : end)) - log(Drpc_a_PSZ(1 : end - 1)); 

% Splice annual dividend growth 
dg_a = splice_data([dg_a_CRSP , dg_a_PSZ , dg_a_W]);

% Implied annual log dividend levels
d_a = [flip(log(Drpc_a_CRSP(end)) - cumsum(flip(dg_a)));log(Drpc_a_CRSP(end))]; 

%% 3.2 Use Quarterly Dividend
% Construct quarterly real per capita dividend growth
D_q_CRSP = gen_avg(D_m_CRSP , 3) * 3;
Dr_q_CRSP = D_q_CRSP./cpi_q * cpi_bench;
Drpc_q_CRSP = Dr_q_CRSP./pop_q;%Per capita dividend in 2019Q4 dollar
dg_q_CRSP = [nan(3 , 1);...%Account for one less observation for other growth rate
            (log(Drpc_q_CRSP(5 : end)) - log(Drpc_q_CRSP(1 : end - 4)))/4];

%% 3.3 Interpolating Annual Dividend
%--------------------------------------------------------------------------
% Method 1: simple method, fourth of annual growth (used in the main paper)
%--------------------------------------------------------------------------
dg_q_W_simple = kron(dg_a_W/4 , ones(4 , 1));
dg_q_PSZ_simple = kron(dg_a_PSZ/4 , ones(4 , 1));

%--------------------------------------------------------------------------
% Method 2: proportional Denton method (not used in the main paper)
%--------------------------------------------------------------------------
% [D_q_W_denton , D_a_W_trend] = interp_denton(D_a_W , 4 , 1 , 2 , 1 , 2);
% Dr_q_W_denton = D_q_W_denton./cpi_q * cpi_bench;
% Drpc_q_W_denton = Dr_q_W_denton./pop_q;
% 
% [D_q_PSZ_denton , D_a_PSZ_trend] = interp_denton(D_a_PSZ , 4 , 1 , 2 , 1 , 2);
% Dr_q_PSZ_denton = D_q_PSZ_denton./cpi_q * cpi_bench;
% Drpc_q_PSZ_denton = Dr_q_PSZ_denton./pop_q;
% 
% dg_q_W_denton = (log(Drpc_q_W_denton(5 : end)) - log(Drpc_q_W_denton(1 : end - 4)))/4;%1901Q1 to 1929Q4
% dg_q_PSZ_denton = (log(Drpc_q_PSZ_denton(5 : end)) - log(Drpc_q_PSZ_denton(1 : end - 4)))/4;%1914Q1 to 2019Q4

% % Treat outliers. If more than 3 s.d. away, replace with a fourth of annual
% % growth
% W_denton_idx = find(isoutlier(dg_q_W_denton , 'mean') == 1);
% PSZ_denton_idx = find(isoutlier(dg_q_PSZ_denton , 'mean') == 1);
% 
% dg_q_W_denton(W_denton_idx) = dg_q_W_simple(W_denton_idx);
% dg_q_PSZ_denton(PSZ_denton_idx) = dg_q_PSZ_simple(PSZ_denton_idx);              

% Choose the interpolation method
method_d_name = 'simple';
dg_q_W_int = [nan(l_q_g - length(eval(['dg_q_W_' , method_d_name])) , 1);...
              eval(['dg_q_W_' , method_d_name])];         
              
dg_q_PSZ_int = [nan(l_q_g - length(eval(['dg_q_W_' , method_d_name])) , 1);...
                eval(['dg_q_PSZ_' , method_d_name])];    
                      
%% 3.4 Splice Quarterly Dividend Growth 
dg_q = splice_data([dg_q_CRSP , dg_q_PSZ_int , dg_q_W_int]);
dg_q_long = splice_data([dg_q , gg_q_BU_int]);
                            
% Implied log real per capita dividend
d_q = [flip(log(Drpc_q_CRSP(end)) - cumsum(flip(dg_q))); log(Drpc_q_CRSP(end))]; 

%% 4. PRICES

%% 4.1 Bond Yields
TB3SN_m = Data_raw_m.TB3SN(end - l_m + 1 : end)/100; 
TB3MS_m = Data_raw_m.TB3MS(end - l_m + 1 : end)/100; 
TB1YCM_m = Data_raw_m.TB1YCM(end - l_m + 1 : end)/100;

TB3Fred_al_m = (1./(1 - TB3MS_m * (91/360))).^(365.25/91)-1;

% Splice 3-month tbill rates
TB3m_al_m = splice_data([TB3SN_m , TB3Fred_al_m]);

% Generate quarterly series
TB3m_al_q = TB3m_al_m(3 : 3 : end);
tb3m_al_q = log(1 + TB3m_al_q);% log 3-month tbill rates, annualized

% Ex post real risk-free rate
tb3mreal_ep_q = tb3m_al_q(1 : end - 1)/4 - infl_q(end - l_q + 2 : end);

% Ex ante real risk-free rate using inflation expectations
tb3mreal_ea_q = tb3m_al_q/4 - log(1 + Data_raw_q.expinfl(end - l_q + 1 : end));% Ex ante

% Ex ante real risk-free rate as in Beeler and Campell (2012)
RHS_BC = [tb3m_al_q(1 : end - 1)/4 , infl_ma(1 : end - 1)];
b_BC = olsgmmnan(tb3mreal_ep_q ,...%Expost real rf
                 RHS_BC ,...%Nominal rf and lagged MA of log inflation
                 1 , 0 , -1);
tb3mreal_ea_q_BC = [ones(l_q , 1) , tb3m_al_q/4 , infl_ma] * b_BC;

%% 4.2 Returns
RetCRSP_m = Data_raw_m.vwretd(end - l_m + 1 : end); 
RetSchwert_m = Data_raw_m.Rm_schwert(end - l_m + 1 : end); 

% Splice stock return series
stockRet_m = splice_data([RetCRSP_m , RetSchwert_m]);

% Quarterly values
Ret_q = exp(gen_avg(log(1 + stockRet_m) , 3) * 3) - 1;
ret_q = log(1 + Ret_q);% log returns
retreal_q = ret_q(2 : end) - infl_q;%1871Q2 to 2019Q4, realized

% Log excess returns
exret_q = ret_q(2 : end) - tb3m_al_q(1 : end - 1)/4;

%% 5. DATA for PREDICTIVE REGRESSIONS

%% 5.1 Experienced Growth
gain_q = 0.018;
expd = [nan(l_q - (l_q_g - nonnan_first(dg_q_long) + 1) , 1);...
       expweightedavg(dg_q_long(nonnan_first(dg_q_long) : end) , gain_q)];
   
expr = [nan(l_q - (l_q_g - nonnan_first(retreal_q) + 1) , 1);...
       expweightedavg(retreal_q(nonnan_first(retreal_q) : end) , gain_q)];

%% 5.2 P/D Ratio
% Without repurchase adjustment
D_m_idx= Data_raw_m.D_idx(end - l_m + 1 : end);
D_q_idx = gen_avg(D_m_idx , 3) * 3;
D_q_idx_TTM = [nan(3 , 1); ...
               movsum(D_q_idx , 4 , 'Endpoints' , 'discard')];
pd_q_ttm = log(Data_raw_m.totval(end - 3 * length(D_q_idx_TTM) + 3 : 3 : end)./D_q_idx_TTM); 

% With repurchase adjustment
D_q_TTM = [nan(3 , 1); ...
           movsum(D_q_CRSP , 4 , 'Endpoints' , 'discard')];

pd_q_ttm_2 = log(Data_raw_m.mcaptot(end - 3 * length(D_q_TTM) + 3 : 3 : end)./D_q_TTM); 
       
%% 6. OUTPUT DATA

%% 6.1 Quarterly               
Quarterly_matrix = [Data_raw_q.yyyy(end - l_q + 1 : end) , Data_raw_q.qtr(end - l_q + 1 : end) , pop_q , [nan;infl_q] ,...
                    Data_raw_q.expinfl(end - l_q + 1 : end) , Data_raw_q.futretmkt(end - l_q + 1 : end), Data_raw_q.treas1y(end - l_q + 1 : end), Data_raw_q.eqpremtreas(end - l_q + 1 : end) ,...
                    Data_raw_q.realltg(end - l_q + 1 : end) , Data_raw_q.realmtg(end - l_q + 1 : end) ,...
                    Ret_q , ...
                    Data_raw_q.retvol(end - l_q + 1 : end) ,...
                    TB3m_al_q ,[nan;tb3mreal_ep_q] , tb3mreal_ea_q , tb3mreal_ea_q_BC ,...
                    c_q ,...
                    d_q ,...
                    [nan; cg_q] ,...
                    [nan; dg_q_long]];   
               
T_q = array2table(Quarterly_matrix,...
     'VariableNames',{'yyyy','qtr','pop','infl',...
                      'inflexp', 'retexp' , 'TB1Y_Matched' , 'exretexp'...
                      'realltg' , 'realmtg' ,...
                      'Ret' , 'retvol' , ...
                      'TB3m_al', 'tb3mreal_ep' , 'tb3mreal_ea', 'tb3mreal_ea_BC' ,...
                      'c',...
                      'd',...
                      'cg',...
                      'dg'});    
 
%% 6.2 Annual      
Annual_matrix = [Data_raw_a.yyyy(end - l_a + 1 : end) ,...
                 c_a , d_a];   
                                   
T_a = array2table(Annual_matrix,...
     'VariableNames',{'yyyy','c', 'd'});
                         
%% 6.3 For Predictive Regressions
Pred_matrix = [Data_raw_q.yyyy(end - l_q + 1 : end) , Data_raw_q.qtr(end - l_q + 1 : end) ,...
               [nan;exret_q] , [nan;retreal_q] ,...               
               expd , expr ,...
               infl_ma ,...
               pd_q_ttm , pd_q_ttm_2];   

T_pred = array2table(Pred_matrix,...
     'VariableNames',{'yyyy','qtr',...
                      'exret','realret' ,...
                      'expd' , 'expr' ,...
                      'infl_ma' , 'pd' , 'pd_2'});

% Export data
data_file_name = '../Data/Data_Sum.xlsx';
writetable(T_q , data_file_name,'Sheet','Quarterly');
writetable(T_a , data_file_name,'Sheet','Annual');
writetable(T_pred , data_file_name,'Sheet','Pred');

